clear all
set more off
global data 	"R:\SharedProjects\Shared2020-070\2016\extend_to_2020\JPE_Replication_dta"
global in 		"R:\SharedProjects\Shared2020-070\2016\input"
global figures 	"R:\SharedProjects\Shared2020-070\2016\extend_to_2020\JPE_Replication_log"

cap log close
log using $figures\D_file,replace t

** new
clear
set maxvar 30000

cd $data
u f831_and_inc.dta, clear
merge m:1 hhidpn using "$data\temp1"		/*temp1.dta is the longitudinal HRS for the vars of interest, in long format*/

drop if _merge==1
drop if _merge==2 			
drop _merge					

						
* replace income with 0 if and then with missing if person has died
replace w2earn 			= 0 if w2earn==.
replace earn 			= 0 if earn==.|earn==.m
replace w2earn 			= . if year>year_died+1 & year_died!=.
replace earn 			= . if year>year_died+1 & year_died!=.
replace experience	 	= . if year>year_died+1 & year_died!=.
replace experience_alt 	= . if year>year_died+1 & year_died!=.

* find closest interview after F831 application (i.e. merge in interview data by application year)
gen wave_appl = .
forvalues j=1(1)15	{
	g dist`j'=(interview_date`j'-date_app)
	replace dist`j' = . if dist`j' < -2*31 | dist`j' > 12*31
	replace dist`j' = abs(dist`j')
	}
egen row_min = rowmin(dist1-dist15)
drop ragey* riwend*

********************************************************************************************************************************************************************

foreach i in shlt 	hlthlm jcocc  jcten  jcind  jlocc jlten  jlind mstat  hibp          diab      cancr    lung           heart strok  psych  arthr ///
			 hosp 	hspnit walkra dressa stoopa bmi   beda   shopa mealsa hlthlm_direct hlth_temp cantwork proxy  ///
			 jcoccb jcoccc jloccb  jloccc jlasty lbrf  doctim oopmd oopmdo oopmdsp oopmdosp totmd         higov     govmr    govmd          govva mrprem prpcnt ///
			 jyears jphys jlift jstoop jsight jstres hospcov nurscov surgcov doctcov dentcov drugcov homecov helpcov gisp pisp ///
			 prprm1 prprm2 prprm3 hiothp henum  atotb atotf astck  achck acd    abond pmbmi iwstat itot issdi isdi issi interview_date inlbrf inw covr {
	qui gen `i'_appl=.
}

forvalues j=1(1)15	{
foreach i in shlt 	hlthlm jcocc  jcten  jcind  jlocc jlten  jlind mstat  hibp          diab      cancr    lung           heart strok  psych  arthr ///
			 hosp 	hspnit walkra dressa stoopa bmi   beda   shopa mealsa hlthlm_direct hlth_temp cantwork proxy  ///
			 jcoccb jcoccc jloccb  jloccc jlasty lbrf  doctim oopmd oopmdo oopmdsp oopmdosp totmd         higov     govmr    govmd          govva mrprem prpcnt ///
			 jyears jphys jlift jstoop jsight jstres gisp pisp ///
			 prprm1 prprm2 prprm3 hiothp henum                                  pmbmi iwstat issdi isdi issi  inlbrf covr {
		qui replace `i'_appl = r`i'`j' 	if dist`j'==row_min & row_min!=. // find interview closest in time to application
	}
}

forvalues j=1(1)15	{
foreach i in hospcov nurscov surgcov doctcov dentcov drugcov homecov helpcov  {
		qui replace `i'_appl = `i'`j' 	if dist`j'==row_min & row_min!=. // find interview closest in time to application
	}
}

foreach i in hibp diab cancr lung heart strok psych arthr {
	qui replace `i'_appl = 1 if `i'_appl == 3 //disputes previous report and now has condition
	qui replace `i'_appl = 0 if `i'_appl == 4 //disputes previous report and now does not
}

forvalues j=1(1)15	{
foreach i in atotb astck  achck acd    abond itot atotf  {
		qui replace `i'_appl = h`i'`j' 	if dist`j'==row_min & row_min!=. // find interview closest in time to application
	}
}

forvalues j=1(1)15	{
		qui replace wave_appl = `j' if dist`j'==row_min & row_min!=.
		qui replace inw_appl = inw`j' if dist`j'==row_min & row_min!=.
		qui replace interview_date_appl = interview_date`j' 	if dist`j'==row_min & row_min!=.
}
********************************************************************************************************************************************************************


********************************************************************************************************************************************************************
****This creates variables that are filled with the value of the variable in the year of the HRS interview 

foreach i in shlt 	hlthlm jcocc  jcten  jcind  jlocc jlten  jlind mstat  hibp          diab      cancr    lung           heart strok  psych  arthr ///
			 hosp 	hspnit walkra dressa stoopa bmi   beda   shopa mealsa hlthlm_direct hlth_temp cantwork proxy  ///
			 jcoccb jcoccc jloccb  jloccc jlasty lbrf  doctim  oopmd oopmdo oopmdsp oopmdosp totmd         higov     govmr    govmd          govva mrprem prpcnt ///
			 jyears jphys jlift jstoop jsight jstres hospcov nurscov surgcov doctcov dentcov drugcov homecov helpcov gisp pisp ///
			 prprm1 prprm2 prprm3 hiothp henum  atotb atotf astck  achck acd    abond pmbmi iwstat itot issdi isdi issi interview_date inlbrf inw covr {
	qui gen `i'_intvw=.
}

forvalues j=1(1)15	{
	qui gen iy`j'=year(interview_date`j')
}

forvalues j=1(1)15	{
foreach i in shlt 	hlthlm jcocc  jcten  jcind  jlocc jlten  jlind mstat  hibp          diab      cancr    lung           heart strok  psych  arthr ///
			 hosp 	hspnit walkra dressa stoopa bmi   beda   shopa mealsa hlthlm_direct hlth_temp cantwork  proxy  ///
			 jcoccb jcoccc jloccb  jloccc jlasty lbrf  doctim  oopmd oopmdo oopmdsp oopmdosp totmd         higov     govmr    govmd          govva mrprem prpcnt ///
			 jyears jphys jlift jstoop jsight jstres gisp pisp  ///
			 prprm1 prprm2 prprm3 hiothp henum                                  pmbmi iwstat issdi isdi issi  inlbrf covr {
		qui replace `i'_intvw = r`i'`j' 	if iy`j'==year
	}
}

forvalues j=1(1)15	{
foreach i in hospcov nurscov surgcov doctcov dentcov drugcov homecov helpcov  {
		qui replace `i'_intvw = `i'`j' 	if iy`j'==year
	}
}


foreach i in hibp diab cancr lung heart strok psych arthr {
	qui replace `i'_intvw = 1 if `i'_intvw == 3 //disputes previous report and now has condition
	qui replace `i'_intvw = 0 if `i'_intvw == 4 //disputes previous report and now does not
}

forvalues j=1(1)15	{
foreach i in atotb astck  achck acd    abond itot atotf  {
		qui replace `i'_intvw = h`i'`j' if iy`j'==year
	}
}

forvalues j=1(1)15	{
		qui replace inw_intvw = inw`j' if iy`j'==year
		qui replace interview_date_intvw = interview_date`j' 	if iy`j'==year
}
********************************************************************************************************************************************************************

*Some variable renaming
ren shlt_appl 			hlt_appl
ren hlthlm_appl 		hlthlm_appl
ren interview_date_appl intvw_dt_appl
ren interview_date_intvw intvw_dt_intvw
ren jcocc_appl 			curr_occ_appl
ren jcten_appl 			curr_tenure_appl
ren jcind_appl 			curr_ind_appl
ren jlocc_appl 			longest_occ_appl
ren jlten_appl 			longest_tenure_appl
ren jlind_appl 			longest_ind_appl
ren mstat_appl 			mar_stat_at_appl
ren hlthlm_direct_appl 	hlthlm_direct_appl
ren hlth_temp_appl 		hlth_temp_appl
ren cantwork_appl 		cantwork_appl
ren proxy_appl 			proxy_appl
ren jcoccb_appl 		curr_occ_b_appl
ren jcoccc_appl 		curr_occ_c_appl
ren jloccb_appl 		longest_occ_b_appl
ren jloccc_appl 		longest_occ_c_appl
ren jlasty_appl 		last_lfp_yr_appl
ren lbrf_appl 			lfp_status_appl
ren doctim_appl 		doc_visits_appl
ren row_min 			days_to_interview_appl

lab var days_to_interview_appl "Days between interview and application"

label val curr_occ_appl OCCUP
label val curr_occ_b_appl OCCUPB
label val curr_occ_c_appl OCCUPC
label val longest_occ_appl OCCUP
label val longest_occ_b_appl OCCUPB
label val longest_occ_c_appl OCCUPC
label val lfp_status_appl LBRF


gen within_threshold_appl = 0
replace within_threshold_appl = 1 if days_to_interview_appl != .

gen bs_sf_appl = .
replace bs_sf_appl=0 if hibp_appl==0 & psych_appl==0 & heart_appl==0 & arthr_appl==0  & diab_appl==0 & lung_appl==0 & strok_appl==0 & cancr_appl==0
replace bs_sf_appl=1 if hibp_appl==1
replace bs_sf_appl=2 if psych_appl==1
replace bs_sf_appl=3 if heart_appl==1
replace bs_sf_appl=4 if arthr_appl==1
replace bs_sf_appl=5 if diab_appl==1
replace bs_sf_appl=6 if lung_appl==1
replace bs_sf_appl=7 if strok_appl==1
replace bs_sf_appl=8 if cancr_appl==1
label def bs_sf  0 "None" 1 "High BP" 2 "Psych.cond." 3 "Heart" 4 "Arthritis" 5 "Diabetis" ///
				6 "Lung disease" 7 "Stroke" 8 "Cancer" ,replace
label values bs_sf_appl bs_sf

gen bs_sf_intvw = .
replace bs_sf_intvw=0 if hibp_intvw==0 & psych_intvw==0 & heart_intvw==0 & arthr_intvw==0  & diab_intvw==0 & lung_intvw==0 & strok_intvw==0 & cancr_intvw==0
replace bs_sf_intvw=1 if hibp_intvw==1
replace bs_sf_intvw=2 if psych_intvw==1
replace bs_sf_intvw=3 if heart_intvw==1
replace bs_sf_intvw=4 if arthr_intvw==1
replace bs_sf_intvw=5 if diab_intvw==1
replace bs_sf_intvw=6 if lung_intvw==1
replace bs_sf_intvw=7 if strok_intvw==1
replace bs_sf_intvw=8 if cancr_intvw==1
label def bs_sf  0 "None" 1 "High BP" 2 "Psych.cond." 3 "Heart" 4 "Arthritis" 5 "Diabetis" ///
				6 "Lung disease" 7 "Stroke" 8 "Cancer" ,replace
label values bs_sf_intvw bs_sf



gen wave = .
foreach i in rissdi rissi risdi rshlt rhlthlm interview_date rmstat rhibp rdiab rcancr rlung rheart rstrok rpsych rarthr ///
			 rhosp rhspnit rwalkra rdressa rstoopa rbmi rbeda rshopa rmealsa rhlthlm_direct rhlth_temp rcantwork rproxy ///
			 rjcocc rjcoccb rjcoccc rjlocc rjloccb rjloccc rjlasty rlbrf rdoctim roopmd roopmdo rtotmd rhigov rgovmr rgovmd rgovva rmrprem rprpcnt ///
			 rprprm1 rprprm2 rprprm3 rhiothp rhenum hatotb hastck hachck hacd habond {
	gen `i'=.
}

forvalues j=1(1)15	{
	g ddist`j'=(year(interview_date`j')-year)
	replace ddist`j'=. if ddist`j' < -2*31 | ddist`j' > 12*31
	replace ddist`j' = abs(ddist`j')
	}
egen row_min = rowmin(ddist1-ddist15)

forvalues j=1(1)15	{
	foreach i in rissdi rissi risdi rshlt rhlthlm interview_date rmstat rhibp rdiab rcancr rlung rheart rstrok rpsych rarthr ///
				 rhosp rhspnit rwalkra rdressa rstoopa rbmi rbeda rshopa rmealsa rhlthlm_direct rhlth_temp rcantwork  rproxy  ///
				 rjcocc rjcoccb rjcoccc rjlocc rjloccb rjloccc rjlasty rlbrf rdoctim  rtotmd rhigov rgovmr rgovmd rgovva rmrprem rprpcnt ///
				 rprprm1 rprprm2 rprprm3 rhiothp rhenum hatotb hastck hachck hacd habond roopmd roopmdo {
		replace `i' = `i'`j' if ddist`j'==row_min & row_min!=.
		replace wave = `j' if ddist`j'==row_min & row_min!=.
	}
}

ren rhlthlm hlthlm
ren interview_date intvw_dt
ren rmstat marital_status
ren rissdi received_ssidi
ren rissi received_ssi
ren risdi received_di

ren rhlthlm_direct hlthlm_direct
ren rhlth_temp hlth_temp
ren rcantwork cantwork
ren rproxy proxy

ren roopmd oopmd
ren roopmdo oopmdo

ren rjcocc curr_occ
ren rjcoccb curr_occ_b
ren rjcoccc curr_occ_c
ren rjlocc longest_occ			/*1980 census def*/
ren rjloccb longest_occ_b		/*2000 census def*/
ren rjloccc longest_occ_c		/*2010 census def*/

ren rjlasty last_lfp_yr
ren rlbrf lfp_status
ren rdoctim doc_visits

label val curr_occ OCCUP
label val curr_occ_b OCCUPB
label val curr_occ_c OCCUPC

label val longest_occ OCCUP
label val longest_occ_b OCCUPB
label val longest_occ_c OCCUPC
label val lfp_status LBRF


foreach i in hibp diab cancr lung heart strok psych arthr hosp hspnit walkra dressa stoopa bmi beda shopa mealsa {
	ren r`i' `i'
}

foreach i in hibp diab cancr lung heart strok psych arthr {
	replace `i' = 1 if `i' == 3 //disputes previous report and now has condition
	replace `i' = 0 if `i' == 4 //disputes previous report and now does not
}

gen bs_sf = .
replace bs_sf=0 if hibp==0 & psych==0 & heart==0 & arthr==0  & diab==0 & lung==0 & strok==0 & cancr==0
replace bs_sf=1 if hibp==1
replace bs_sf=2 if psych==1
replace bs_sf=3 if heart==1
replace bs_sf=4 if arthr==1
replace bs_sf=5 if diab==1
replace bs_sf=6 if lung==1
replace bs_sf=7 if strok==1
replace bs_sf=8 if cancr==1
label values bs_sf bs_sf

ren row_min yrs_to_interview

gen within_threshold = 0
replace within_threshold = 1 if yrs_to_interview < 2 & yrs_to_interview!=. // flag if there is an interview in same or next calendar year

drop riss* risdi* rshl* rhl* interview_date* dist* rjc* rjl* rmst* riwstat* rcantwork* 
drop rhibp* rdiab* rcancr* rlung* rheart* rstrok* rpsych* rarthr* 
drop rhosp* rhspnit* rwalkra* rdressa* rstoopa* rbmi* rbeda* rshopa* rmealsa* 
drop rproxy* rlbrf* rdoctim* roopmd* roopmdo*

gen obese = bmi >= 30 if bmi < 300 //<300 handles missing. no one actually has bmi over 300
gen underwt = bmi < 18.5 if bmi < 300


gen interview_year=.

forvalues j=1(1)15	{
		replace interview_year = iy`j' if iy`j'==year
}

* and for the applicants
gen obese_appl = bmi_appl >= 30 if bmi_appl < 300 //<300 handles missing. no one actually has bmi over 300
gen underwt_appl = bmi_appl < 18.5 if bmi_appl < 300

gen obese_intvw = bmi_intvw >= 30 if bmi_intvw < 300 //<300 handles missing. no one actually has bmi over 300
gen underwt_intvw = bmi_intvw < 18.5 if bmi_intvw < 300


* clean up demographics 
foreach x in raeduc {
sort hhidpn `x', stable
by hhidpn (`x'): egen m`x'=mode(`x')
sort hhidpn m`x' `x', stable
by hhidpn m`x' (`x'): replace m`x'=`x'[1] if m`x'==.
replace `x' =m`x' 
drop m`x'
}

* construct age
gen age=year-rabyear
drop rabdate
drop dob_y
ren rabyear dob_y

sort hhidpn record year rid al, stable

replace bs=16 if bs==2|bs==7|bs==8|bs==10|bs==20|bs==19|bs==99	/*"Other disability"*/
replace bs=15 if bs==5|bs==6

label def bs 	1 "Musculoskeletal" 3 "Respiratory" 4 "Cardiov." 15 "Dig. \& Urin." 16 "Other" ///
				9 "Endocrine" 11 "Neurol." 12 "Mental dis." 13 "Cancer" 14 "Immune def.",replace
label values bs bs

* binary education variable
gen college = .
replace college = 0 if raeduc==1 | raeduc==2 | raeduc==3
replace college = 1 if raeduc==4 | raeduc==5
label def college 0 "At most High School degree" 1 "Some college or more"
label values college college

* binary race variable
replace raracem=3 if raracem>3 & hhidpn==521804020		/*From confidential race data*/
replace raracem=3 if raracem>3 & hhidpn==909765010		/*From confidential race data*/
replace raracem=3 if raracem>3 & hhidpn==902295020		/*From confidential race data*/

gen white =.
replace white = 1 if raracem==1
replace white = 0 if raracem==2 | raracem ==3
label def white 1 "White" 0 "Non-white"
label values white white

* binary gender variable
gen female=.
replace female=1 if ragender==2
replace female=0 if ragender==1
lab def female 1 "female" 0 "male"
lab values female female


*binary marital status variable at calendar year level
gen married=.
replace married=1 if marital_status==1
replace married=0 if marital_status!=1 & marital_status!=.
lab def married 1 "married" 0 "unmarried"
lab values married married
gen widowed=.
replace widowed=1 if marital_status==7
replace widowed=0 if marital_status!=7 & marital_status!=.
lab def widowed 1 "widowed" 0 "not widowed"
lab values widowed widowed


*binary marital status variable at application year level
gen married_appl=.
replace married_appl=1 if mar_stat_at_appl==1
replace married_appl=0 if mar_stat_at_appl!=1 & mar_stat_at_appl!=.
lab def married_appl 1 "married" 0 "unmarried"
lab values married_appl married_appl
gen widowed_appl=.
replace widowed_appl=1 if mar_stat_at_appl==7
replace widowed_appl=0 if mar_stat_at_appl!=7 & mar_stat_at_appl!=.
lab def widowed_appl 1 "widowed" 0 "not widowed"
lab values widowed_appl widowed_appl


gen married_intvw=.
ren mstat_intvw 			mar_stat_at_intvw
replace married_intvw=1 if mar_stat_at_intvw==1
replace married_intvw=0 if mar_stat_at_intvw!=1 & mar_stat_at_intvw!=.
lab def married_intvw 1 "married" 0 "unmarried"
lab values married_intvw married_intvw
gen widowed_intvw=.
replace widowed_intvw=1 if mar_stat_at_intvw==7
replace widowed_intvw=0 if mar_stat_at_intvw!=7 & mar_stat_at_intvw!=.
lab def widowed_intvw 1 "widowed" 0 "not widowed"
lab values widowed_intvw widowed_intvw


* binary ssi application variable
gen ssi=.
replace ssi=1 if rid==16
replace ssi=0 if rid==2|rid==216
label var ssi SSI

* label some variabeles
label var experience 	 "Experience using y"
label var experience_alt "Experience using ssa+w2"
label var success 		 "Awarded"
label var nosuccess 	 "Not awarded"
label var bs_sf_intvw	 "Health cond. reported to HRS"
label var bs_sf_appl 	 "Health cond. reported to HRS"

merge m:1 year using $in\cpi.dta
drop if _merge==2
drop _merge

foreach i in w2earn earn {
	gen `i'_real = `i'/(cpi/100)
	drop `i'
}

foreach i in oopmd oopmdo totmd mrprem prprm1 prprm2 prprm3 atotb astck achck acd abond itot issdi isdi issi {
	gen `i'_appl_real = `i'_appl/(cpi/100)
	gen `i'_intvw_real = `i'_intvw/(cpi/100)
	}

//construct household income measure
preserve
	keep w2earn earn cpi hhidpn year
	duplicates drop hhidpn year, force
	gen hhid = int(hhidpn/100)
	gen pn = hhidpn - hhid*100 
	egen maxy=rmax(w2earn earn)
	egen hh_inc=sum(maxy),by(hhid year) miss
	egen hh_ern=sum(earn),by(hhid year) miss
	gcollapse (mean) hh_inc hh_ern cpi,by(hhid year)
	g hh_inc_real=hh_inc/(cpi/100)
	g hh_ern_real=hh_ern/(cpi/100)
	keep hh_* hhid year
	sort hhid year
	tempfile 
	save temphh,replace
restore

gen hhid = int(hhidpn/100)
sort hhid year
merge m:1 hhid year using temphh
drop _merge

erase temphh.dta

** combine longest tenure occupation into one for applicants
** generate longest tenure occupation using the earliest occupation codes

ren jlocc_intvw 		longest_occ_intvw
ren jloccb_intvw 		longest_occ_b_intvw
ren jloccc_intvw 		longest_occ_c_intvw

ren longest_occ     longest_occ_x      
ren longest_occ_b   longest_occ_b_x     
ren longest_occ_c   longest_occ_c_x   

foreach i in appl intvw x {
        gen     longest_occ_combined_`i' = longest_occ_`i'
        label val longest_occ_combined_`i' OCCUP

        *census 2000
        replace longest_occ_combined_`i'=1 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==1|longest_occ_b_`i'==2|longest_occ_b_`i'==3)
        replace longest_occ_combined_`i'=2 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==4|longest_occ_b_`i'==5|longest_occ_b_`i'==6|longest_occ_b_`i'==7|longest_occ_b_`i'==8|longest_occ_b_`i'==9|longest_occ_b_`i'==10|longest_occ_b_`i'==11)
        replace longest_occ_combined_`i'=3 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==17)
        replace longest_occ_combined_`i'=4 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==18)
        replace longest_occ_combined_`i'=5 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==15)
        replace longest_occ_combined_`i'=6 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==13)
        replace longest_occ_combined_`i'=7 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==14)
        replace longest_occ_combined_`i'=8 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==12)
        replace longest_occ_combined_`i'=9 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==16)
        replace longest_occ_combined_`i'=10 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==19)
        replace longest_occ_combined_`i'=11 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==22)
        replace longest_occ_combined_`i'=12 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==20|longest_occ_b_`i'==21)
        replace longest_occ_combined_`i'=13 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==23)
        replace longest_occ_combined_`i'=15 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==24)
        replace longest_occ_combined_`i'=17 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==25)

        *census 2010
        replace longest_occ_combined_`i'=1 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==1|longest_occ_c_`i'==2)
        replace longest_occ_combined_`i'=2 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==3|longest_occ_c_`i'==4|longest_occ_c_`i'==5|longest_occ_c_`i'==6|longest_occ_c_`i'==7|longest_occ_c_`i'==8|longest_occ_c_`i'==9|longest_occ_c_`i'==10)
        replace longest_occ_combined_`i'=3 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==16)
        replace longest_occ_combined_`i'=4 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==17)
        replace longest_occ_combined_`i'=5 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==14)
        replace longest_occ_combined_`i'=6 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==12)
        replace longest_occ_combined_`i'=7 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==13)
        replace longest_occ_combined_`i'=8 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==11)
        replace longest_occ_combined_`i'=9 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==15)
        replace longest_occ_combined_`i'=10 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==18)
        replace longest_occ_combined_`i'=11 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==20)
        replace longest_occ_combined_`i'=12 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==19)
        replace longest_occ_combined_`i'=13 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==21)
        replace longest_occ_combined_`i'=15 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==22)
        replace longest_occ_combined_`i'=17 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==23)

        replace longest_occ_combined_`i' = 0 if longest_occ_combined_`i' > 3000

        gen SOC = longest_occ_combined_`i'

        merge m:1 SOC using $in\ONETpca, keepusing(Phys* Ksa*) // add ONET principal components for physical and cognitive tasks
        foreach var in Phys* Ksa* {
                ren `var' `var'_`i'
        }
        drop _merge SOC
}



drop inw* rpmbmi*   hastck*   habond*   hachck*   hacd*     hatotb*   hitot*    rhenum* ///   
rhigov*   rgovmr*   rgovmd*   rgovva*   rhiothp*  rprprm*   rmrprem*  rprpcnt*  rprprm1* ///  
rprprm2*  rprprm3*  rinlbrf*  rtotmd*   iy*       ddist*   

compress

sort hhidpn year
merge hhidpn year using $data\receive_ssi_ssa
drop if _merge==2
drop _merge

sort hhidpn year
merge hhidpn year using $data\receive_ssdi_ssa							
drop if _merge==2
drop _merge

format hhidpn %12.0g

sort hhidpn record year

****************************************
****************************************
save complete_data_final_2_12_v1.dta, replace 
****************************************
****************************************
* final dataset is complete
* has at least one obs per person per year, with data on income, f831 application,
* and if rand interview happened after f831 application, then also closest-in-time interview data
* note that if more than one f831 application or appeal in given year, then there are 
* multiple rows per person per year (one for each f831 application/appeal)

******************************************************************************
*Now construct the 12 months before dataset
******************************************************************************
cd $data
u f831_and_inc.dta, clear
merge m:1 hhidpn using temp1
drop if _merge==1
drop if _merge==2 			
drop _merge					
							
							
* replace income with missing if person has died
replace w2earn 			= 0 if w2earn==.
replace earn 			= 0 if earn==.|earn==.m
replace w2earn 			= . if year>year_died+1 & year_died!=.
replace earn 			= . if year>year_died+1 & year_died!=.
replace experience	 	= . if year>year_died+1 & year_died!=.
replace experience_alt 	= . if year>year_died+1 & year_died!=.


* find closest interview after F831 application (i.e. merge in interview data by application year)
gen wave_appl = .
scalar def month_threshold=12		/*Distance between disability report in HRS and date of application*/
forvalues j=1(1)15	{
	g dist`j'=(interview_date`j'-date_app)
	replace dist`j'=. if dist`j'>0 // only interested in interview that happens *before* application
	replace dist`j'= abs(dist`j')
	}
	
egen row_min = rowmin(dist1-dist15)
drop ragey* riwend*

foreach i in shlt 	hlthlm jcocc  jcten  jcind  jlocc jlten  jlind mstat  hibp          diab      cancr    lung           heart strok  psych  arthr ///
			 hosp 	hspnit walkra dressa stoopa bmi   beda   shopa mealsa hlthlm_direct hlth_temp cantwork proxy  ///
			 jcoccb jcoccc jloccb  jloccc jlasty lbrf  doctim oopmd oopmdo oopmdsp oopmdosp totmd         higov     govmr    govmd          govva mrprem prpcnt ///
			 jyears jphys jlift jstoop jsight jstres hospcov nurscov surgcov doctcov dentcov drugcov homecov helpcov gisp pisp ///
			 prprm1 prprm2 prprm3 hiothp henum  atotb atotf astck  achck acd    abond pmbmi iwstat itot issdi isdi issi interview_date inlbrf inw covr {
	qui gen `i'_appl=.
}

forvalues j=1(1)15	{
foreach i in shlt 	hlthlm jcocc  jcten  jcind  jlocc jlten  jlind mstat  hibp          diab      cancr    lung           heart strok  psych  arthr ///
			 hosp 	hspnit walkra dressa stoopa bmi   beda   shopa mealsa hlthlm_direct hlth_temp cantwork proxy  ///
			 jcoccb jcoccc jloccb  jloccc jlasty lbrf  doctim oopmd oopmdo oopmdsp oopmdosp totmd         higov     govmr    govmd          govva mrprem prpcnt ///
			 jyears jphys jlift jstoop jsight jstres gisp pisp ///
			 prprm1 prprm2 prprm3 hiothp henum                                  pmbmi iwstat issdi isdi issi  inlbrf covr {
		qui replace `i'_appl = r`i'`j' 	if dist`j'==row_min & row_min!=. // find interview closest in time to application
	}
}

forvalues j=1(1)15	{
foreach i in hospcov nurscov surgcov doctcov dentcov drugcov homecov helpcov  {
		qui replace `i'_appl = `i'`j' 	if dist`j'==row_min & row_min!=. // find interview closest in time to application
	}
}

foreach i in hibp diab cancr lung heart strok psych arthr {
	qui replace `i'_appl = 1 if `i'_appl == 3 //disputes previous report and now has condition
	qui replace `i'_appl = 0 if `i'_appl == 4 //disputes previous report and now does not
}

forvalues j=1(1)15	{
foreach i in atotb astck  achck acd    abond itot atotf  {
		qui replace `i'_appl = h`i'`j' 	if dist`j'==row_min & row_min!=. // find interview closest in time to application
	}
}

forvalues j=1(1)15	{
		replace wave_appl = `j' if dist`j'==row_min & row_min!=.
		replace inw_appl = inw`j' if dist`j'==row_min & row_min!=.
		replace interview_date_appl = interview_date`j' 	if dist`j'==row_min & row_min!=.
}


****The command below creates variables that take values in the year of the interview
foreach i in shlt 	hlthlm jcocc  jcten  jcind  jlocc jlten  jlind mstat  hibp          diab      cancr    lung           heart strok  psych  arthr ///
			 hosp 	hspnit walkra dressa stoopa bmi   beda   shopa mealsa hlthlm_direct hlth_temp cantwork proxy  ///
			 jcoccb jcoccc jloccb  jloccc jlasty lbrf  doctim  oopmd oopmdo oopmdsp oopmdosp totmd         higov     govmr    govmd          govva mrprem prpcnt ///
			 jyears jphys jlift jstoop jsight jstres hospcov nurscov surgcov doctcov dentcov drugcov homecov helpcov gisp pisp ///
			 prprm1 prprm2 prprm3 hiothp henum  atotb atotf astck  achck acd    abond pmbmi iwstat itot issdi isdi issi interview_date inlbrf inw covr {
	qui gen `i'_intvw=.
}

forvalues j=1(1)15	{
	qui gen iy`j'=year(interview_date`j')
}

forvalues j=1(1)15	{
foreach i in shlt 	hlthlm jcocc  jcten  jcind  jlocc jlten  jlind mstat  hibp          diab      cancr    lung           heart strok  psych  arthr ///
			 hosp 	hspnit walkra dressa stoopa bmi   beda   shopa mealsa hlthlm_direct hlth_temp cantwork  proxy  ///
			 jcoccb jcoccc jloccb  jloccc jlasty lbrf  doctim  oopmd oopmdo oopmdsp oopmdosp totmd         higov     govmr    govmd          govva mrprem prpcnt ///
			 jyears jphys jlift jstoop jsight jstres gisp pisp  ///
			 prprm1 prprm2 prprm3 hiothp henum                                  pmbmi iwstat issdi isdi issi  inlbrf covr {
		qui replace `i'_intvw = r`i'`j' 	if iy`j'==year
	}
}

forvalues j=1(1)15	{
foreach i in hospcov nurscov surgcov doctcov dentcov drugcov homecov helpcov  {
		qui replace `i'_intvw = `i'`j' 	if iy`j'==year
	}
}


foreach i in hibp diab cancr lung heart strok psych arthr {
	qui replace `i'_intvw = 1 if `i'_intvw == 3 //disputes previous report and now has condition
	qui replace `i'_intvw = 0 if `i'_intvw == 4 //disputes previous report and now does not
}

forvalues j=1(1)15	{
foreach i in atotb astck  achck acd    abond itot atotf  {
		qui replace `i'_intvw = h`i'`j' if iy`j'==year
	}
}

forvalues j=1(1)15	{
		qui replace inw_intvw = inw`j' if iy`j'==year
		qui replace interview_date_intvw = interview_date`j' 	if iy`j'==year
}



*Some variable renaming
ren shlt_appl 			 hlt_appl
ren hlthlm_appl 		 hlthlm_appl
ren interview_date_appl  intvw_dt_appl
ren interview_date_intvw intvw_dt_intvw
ren jcocc_appl 			 curr_occ_appl
ren jcten_appl 			 curr_tenure_appl
ren jcind_appl 			 curr_ind_appl
ren jlten_appl 			 longest_tenure_appl
ren jlind_appl 			 longest_ind_appl
ren mstat_appl 			 mar_stat_at_appl
ren hlthlm_direct_appl 	 hlthlm_direct_appl
ren hlth_temp_appl 		 hlth_temp_appl
ren cantwork_appl 		 cantwork_appl
ren proxy_appl 			 proxy_appl
ren jcoccb_appl 		 curr_occ_b_appl
ren jcoccc_appl 		 curr_occ_c_appl
ren jlocc_appl 			 longest_occ_appl
ren jloccb_appl 		 longest_occ_b_appl
ren jloccc_appl 		 longest_occ_c_appl
ren jlasty_appl 		 last_lfp_yr_appl
ren lbrf_appl 			 lfp_status_appl
ren doctim_appl 		 doc_visits_appl
ren row_min 			 days_to_interview_appl

lab var days_to_interview_appl "Days between interview and application"

label val curr_occ_appl 		OCCUP
label val curr_occ_b_appl 		OCCUPB
label val curr_occ_c_appl 		OCCUPC
label val longest_occ_appl 		OCCUP
label val longest_occ_b_appl 	OCCUPB
label val longest_occ_c_appl 	OCCUPC
label val lfp_status_appl 		LBRF



gen within_threshold_appl = 0
replace within_threshold_appl = 1 if days_to_interview_appl < month_threshold*31 & days_to_interview_appl!=. // keep if interview within month_threshold of application

//6 and 9 month window after application
gen within_6mo = 0 
gen within_9mo = 0
replace within_6mo = 1 if days_to_interview_appl < 6*31 & days_to_interview_appl!=.
replace within_9mo = 1 if days_to_interview_appl < 9*31 & days_to_interview_appl!=.

* create indicator for doctor-diagnosed health condition:
gen 	bs_sf_appl = .
replace bs_sf_appl=0 if hibp_appl==0 & psych_appl==0 & heart_appl==0 & arthr_appl==0  & diab_appl==0 & lung_appl==0 & strok_appl==0 & cancr_appl==0
replace bs_sf_appl=1 if hibp_appl==1
replace bs_sf_appl=2 if psych_appl==1
replace bs_sf_appl=3 if heart_appl==1
replace bs_sf_appl=4 if arthr_appl==1
replace bs_sf_appl=5 if diab_appl==1
replace bs_sf_appl=6 if lung_appl==1
replace bs_sf_appl=7 if strok_appl==1
replace bs_sf_appl=8 if cancr_appl==1
label def bs_sf  0 "None" 1 "High BP" 2 "Psych.cond." 3 "Heart" 4 "Arthritis" 5 "Diabetis" ///
				6 "Lung disease" 7 "Stroke" 8 "Cancer" ,replace
label values bs_sf_appl bs_sf

gen bs_sf_intvw = .
replace bs_sf_intvw=0 if hibp_intvw==0 & psych_intvw==0 & heart_intvw==0 & arthr_intvw==0  & diab_intvw==0 & lung_intvw==0 & strok_intvw==0 & cancr_intvw==0
replace bs_sf_intvw=1 if hibp_intvw==1
replace bs_sf_intvw=2 if psych_intvw==1
replace bs_sf_intvw=3 if heart_intvw==1
replace bs_sf_intvw=4 if arthr_intvw==1
replace bs_sf_intvw=5 if diab_intvw==1
replace bs_sf_intvw=6 if lung_intvw==1
replace bs_sf_intvw=7 if strok_intvw==1
replace bs_sf_intvw=8 if cancr_intvw==1
label def bs_sf  0 "None" 1 "High BP" 2 "Psych.cond." 3 "Heart" 4 "Arthritis" 5 "Diabetis" ///
				6 "Lung disease" 7 "Stroke" 8 "Cancer" ,replace
label values bs_sf_intvw bs_sf


gen wave = .
foreach i in rissdi rissi risdi rshlt rhlthlm interview_date rmstat rhibp rdiab rcancr rlung rheart rstrok rpsych rarthr ///
			 rhosp rhspnit rwalkra rdressa rstoopa rbmi rbeda rshopa rmealsa rhlthlm_direct rhlth_temp rcantwork rproxy  ///
			 rjcocc rjcoccb rjcoccc rjlocc rjloccb rjloccc rjlasty rlbrf rdoctim  roopmd roopmdo rtotmd rhigov rgovmr rgovmd rgovva rmrprem rprpcnt ///
			 rjyears rjphys rjlift rjstoop rjsight rjstres hospcov nurscov surgcov doctcov dentcov drugcov homecov helpcov ///
			 rprprm1 rprprm2 rprprm3 rhiothp rhenum hatotb hastck hachck hacd habond hatotf rcovr rgisp rpisp roopmdsp roopmdosp {
	qui gen `i'=.
}


forvalues j=1(1)15	{
	g ddist`j'=(year(interview_date`j')-year)
	replace ddist`j'=. if ddist`j'>0 // only interested in interview that happens *before* a given calendar year
	}
egen row_min = rowmin(ddist1-ddist15)

forvalues j=1(1)15	{
	foreach i in rissdi rissi risdi rshlt rhlthlm interview_date rmstat rhibp rdiab rcancr rlung rheart rstrok rpsych rarthr ///
				 rhosp rhspnit rwalkra rdressa rstoopa rbmi rbeda rshopa rmealsa rhlthlm_direct rhlth_temp rcantwork rproxy ///
				 rjcocc rjcoccb rjcoccc rjlocc rjloccb rjloccc rjlasty rlbrf rdoctim  rtotmd rhigov rgovmr rgovmd rgovva rmrprem rprpcnt ///
	 			 rjyears rjphys rjlift rjstoop rjsight rjstres hospcov nurscov surgcov doctcov dentcov drugcov homecov helpcov ///
				 rprprm1 rprprm2 rprprm3 rhiothp rhenum hatotb hastck hachck hacd habond hatotf roopmd roopmdo  rcovr rgisp rpisp roopmdsp roopmdosp {
		qui replace `i' = `i'`j' if ddist`j'==row_min & row_min!=.
		qui replace wave = `j' if ddist`j'==row_min & row_min!=.
	}
}

ren rhlthlm 		hlthlm
ren interview_date 	intvw_dt
ren rmstat 			marital_status
ren rissdi 			received_ssidi
ren rissi 			received_ssi
ren risdi 			received_di

ren rhlthlm_direct 	hlthlm_direct
ren rhlth_temp 		hlth_temp
ren rcantwork 		cantwork
ren rproxy 			proxy

ren roopmd 			oopmd
ren roopmdo 		oopmdo
ren rcovr			covr

ren rjcocc 			curr_occ
ren rjcoccb 		curr_occ_b
ren rjcoccc 		curr_occ_c
ren rjlocc  		longest_occ			/*1980 census def*/
ren rjloccb 		longest_occ_b		/*2000 census def*/
ren rjloccc 		longest_occ_c		/*2010 census def*/

ren rjlasty 		last_lfp_yr
ren rlbrf 			lfp_status
ren rdoctim 		doc_visits
ren rjlift 			jlift

foreach i in  jyears jphys jstoop jsight jstres gisp pisp {
	ren r`i' `i'
}


label val curr_occ OCCUP
label val curr_occ_b OCCUPB
label val curr_occ_c OCCUPC

label val longest_occ OCCUP
label val longest_occ_b OCCUPB
label val longest_occ_c OCCUPC
label val lfp_status LBRF


foreach i in hibp diab cancr lung heart strok psych arthr hosp hspnit walkra dressa stoopa bmi beda shopa mealsa {
	ren r`i' `i'
}

foreach i in hibp diab cancr lung heart strok psych arthr {
	qui replace `i' = 1 if `i' == 3 //disputes previous report and now has condition
	qui replace `i' = 0 if `i' == 4 //disputes previous report and now does not
}

gen bs_sf = .
replace bs_sf=0 if hibp==0 & psych==0 & heart==0 & arthr==0  & diab==0 & lung==0 & strok==0 & cancr==0
replace bs_sf=1 if hibp==1
replace bs_sf=2 if psych==1
replace bs_sf=3 if heart==1
replace bs_sf=4 if arthr==1
replace bs_sf=5 if diab==1
replace bs_sf=6 if lung==1
replace bs_sf=7 if strok==1
replace bs_sf=8 if cancr==1
label values bs_sf bs_sf

ren row_min yrs_to_interview

gen within_threshold = 0
replace within_threshold = 1 if yrs_to_interview < 2 & yrs_to_interview!=. // flag if there is an interview in same or next calendar year

foreach i in hosp nurs surg doct dent drug home help {
	ren `i'cov cov`i'
	ren `i'cov_appl cov`i'_appl
	ren `i'cov_intvw cov`i'_intvw
	}


drop riss* risdi* rshl* rhl* interview_date* dist* rjc* rjl* rmst* riwstat* rcantwork* 
drop rhibp* rdiab* rcancr* rlung* rheart* rstrok* rpsych* rarthr* 
drop rhosp* rhspnit* rwalkra* rdressa* rstoopa* rbmi* rbeda* rshopa* rmealsa* 
drop rproxy* rlbrf* rdoctim* roopmd* roopmdo* rjyears* rjphys* rjstoop* rjsight* rjstres* rgisp* rpisp*
drop hospcov* nurscov* surgcov* doctcov* dentcov* drugcov* homecov* helpcov*
********************************************************************************************************************************************************************

gen interview_year=.
forvalues j=1(1)15	{
		replace interview_year = iy`j' if iy`j'==year
}


gen obese_appl 		= bmi_appl >= 30 if bmi_appl < 300 		//<300 handles missing. no one actually has bmi over 300
gen underwt_appl 	= bmi_appl < 18.5 if bmi_appl < 300

gen obese_intvw 	= bmi_intvw >= 30 if bmi_intvw < 300 //<300 handles missing. no one actually has bmi over 300
gen underwt_intvw 	= bmi_intvw < 18.5 if bmi_intvw < 300

gen obese 			= bmi >= 30 if bmi < 300 			//<300 handles missing. no one actually has bmi over 300
gen underwt 		= bmi < 18.5 if bmi < 300


* clean up demographics : For people with missing education, assign modal value
foreach x in raeduc {
sort hhidpn `x', stable
by hhidpn (`x'): egen m`x'=mode(`x')
sort hhidpn m`x' `x', stable
by hhidpn m`x' (`x'): replace m`x'=`x'[1] if m`x'==.
replace `x' =m`x' 
drop m`x'
}

* construct age
gen age=year-rabyear
drop rabdate
drop dob_y
ren rabyear dob_y

sort hhidpn record year rid al, stable

replace bs=16 if bs==2|bs==7|bs==8|bs==10|bs==20|bs==19|bs==99	/*"Other disability"*/
replace bs=15 if bs==5|bs==6

label def bs 	1 "Musculoskeletal" 3 "Respiratory" 4 "Cardiov." 15 "Dig. \& Urin." 16 "Other" ///
				9 "Endocrine" 11 "Neurol." 12 "Mental dis." 13 "Cancer" 14 "Immune def.",replace
label values bs bs

* binary education variable
gen college = .
replace college = 0 if raeduc==1 | raeduc==2 | raeduc==3
replace college = 1 if raeduc==4 | raeduc==5
label def college 0 "At most High School degree" 1 "Some college or more"
label values college college

* binary race variable
replace raracem=3 if raracem>3 & hhidpn==521804020		/*From confidential race data*/
replace raracem=3 if raracem>3 & hhidpn==909765010		/*From confidential race data*/
replace raracem=3 if raracem>3 & hhidpn==902295020		/*From confidential race data*/

gen white =.
replace white = 1 if raracem==1
replace white = 0 if raracem==2 | raracem ==3
label def white 1 "White" 0 "Non-white"
label values white white

* binary gender variable
gen female=.
replace female=1 if ragender==2
replace female=0 if ragender==1
lab def female 1 "female" 0 "male"
lab values female female


*binary marital status variable at calendar year level
gen 	married=.
replace married=1 if marital_status==1
replace married=0 if marital_status!=1 & marital_status!=.
lab def married 1 "married" 0 "unmarried"
lab values married married
gen 	widowed=.
replace widowed=1 if marital_status==7
replace widowed=0 if marital_status!=7 & marital_status!=.
lab def widowed 1 "widowed" 0 "not widowed"
lab values widowed widowed


*binary marital status variable at application year level
gen 	married_appl=.
replace married_appl=1 if mar_stat_at_appl==1
replace married_appl=0 if mar_stat_at_appl!=1 & mar_stat_at_appl!=.
lab def married_appl 1 "married" 0 "unmarried"
lab values married_appl married_appl
gen 	widowed_appl=.
replace widowed_appl=1 if mar_stat_at_appl==7
replace widowed_appl=0 if mar_stat_at_appl!=7 & mar_stat_at_appl!=.
lab def widowed_appl 1 "widowed" 0 "not widowed"
lab values widowed_appl widowed_appl


ren 	mstat_intvw 			mar_stat_at_intvw
gen 	married_intvw=.
replace married_intvw=1 if mar_stat_at_intvw==1
replace married_intvw=0 if mar_stat_at_intvw!=1 & mar_stat_at_intvw!=.
lab def married_intvw 1 "married" 0 "unmarried"
lab values married_intvw married_intvw
gen 	widowed_intvw=.
replace widowed_intvw=1 if mar_stat_at_intvw==7
replace widowed_intvw=0 if mar_stat_at_intvw!=7 & mar_stat_at_intvw!=.
lab def widowed_intvw 1 "widowed" 0 "not widowed"
lab values widowed_intvw widowed_intvw


* binary ssi application variable
gen 	ssi=.
replace ssi=1 if rid==16
replace ssi=0 if rid==2|rid==216
label var ssi SSI

* label some variabeles
label var experience 	"Experience using y"
label var experience_alt "Experience using y+w2"
label var success 		"Awarded"
label var nosuccess 	"Not awarded"
label var bs_sf_intvw	"Health cond. reported to HRS"
label var bs_sf_appl 	"Health cond. reported to HRS"

merge m:1 year using $in\cpi.dta
drop if _merge==2
drop _merge

//construct household income measure
preserve
	keep w2earn earn cpi hhidpn year
	duplicates drop hhidpn year, force
	gen hhid = int(hhidpn/100)
	gen pn = hhidpn - hhid*100 
	egen maxy=rmax(w2earn earn)
	egen hh_inc=sum(maxy),by(hhid year) miss
	egen hh_ern=sum(earn),by(hhid year) miss
	gcollapse (mean) hh_inc hh_ern cpi,by(hhid year)
	g hh_inc_real=hh_inc/(cpi/100)
	g hh_ern_real=hh_ern/(cpi/100)
	keep hh_* hhid year
	sort hhid year
	tempfile 
	save temphh,replace
restore

gen hhid = int(hhidpn/100)
sort hhid year
merge m:1 hhid year using temphh
drop _merge

erase temphh.dta

foreach i in w2earn earn {
	gen `i'_real = `i'/(cpi/100)
	drop `i'
}

foreach i in oopmd oopmdo oopmdsp oopmdosp totmd mrprem prprm1 prprm2 prprm3 atotb atotf astck achck acd abond itot issdi isdi issi {
	qui gen `i'_appl_real 	= `i'_appl/(cpi/100)
	qui gen `i'_intvw_real 	= `i'_intvw/(cpi/100)
	drop `i'_appl `i'_intvw
	}


** combine longest tenure occupation into one for applicants
** generate longest tenure occupation using the earliest occupation codes

ren jlocc_intvw 	longest_occ_intvw
ren jloccb_intvw 	longest_occ_b_intvw
ren jloccc_intvw 	longest_occ_c_intvw

ren longest_occ     longest_occ_x      
ren longest_occ_b   longest_occ_b_x     
ren longest_occ_c   longest_occ_c_x   


foreach i in appl intvw x {
	gen 	longest_occ_combined_`i' = longest_occ_`i'
	label val longest_occ_combined_`i' OCCUP

	*census 2000
	replace longest_occ_combined_`i'=1 	if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==1|longest_occ_b_`i'==2|longest_occ_b_`i'==3)
	replace longest_occ_combined_`i'=2 	if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==4|longest_occ_b_`i'==5|longest_occ_b_`i'==6| ///
											longest_occ_b_`i'==7|longest_occ_b_`i'==8|longest_occ_b_`i'==9|longest_occ_b_`i'==10|longest_occ_b_`i'==11)
	replace longest_occ_combined_`i'=3 	if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==17)
	replace longest_occ_combined_`i'=4 	if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==18)
	replace longest_occ_combined_`i'=5 	if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==15)
	replace longest_occ_combined_`i'=6 	if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==13)
	replace longest_occ_combined_`i'=7 	if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==14)
	replace longest_occ_combined_`i'=8 	if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==12)
	replace longest_occ_combined_`i'=9 	if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==16)
	replace longest_occ_combined_`i'=10 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==19)
	replace longest_occ_combined_`i'=11 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==22)
	replace longest_occ_combined_`i'=12 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==20|longest_occ_b_`i'==21)
	replace longest_occ_combined_`i'=13 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==23)
	replace longest_occ_combined_`i'=15 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==24)
	replace longest_occ_combined_`i'=17 if longest_occ_combined_`i'==.b & (longest_occ_b_`i'==25)

	*census 2010
	replace longest_occ_combined_`i'=1 	if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==1|longest_occ_c_`i'==2)
	replace longest_occ_combined_`i'=2 	if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==3|longest_occ_c_`i'==4|longest_occ_c_`i'==5| ///
											longest_occ_c_`i'==6|longest_occ_c_`i'==7|longest_occ_c_`i'==8|longest_occ_c_`i'==9|longest_occ_c_`i'==10)
	replace longest_occ_combined_`i'=3 	if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==16)
	replace longest_occ_combined_`i'=4 	if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==17)
	replace longest_occ_combined_`i'=5 	if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==14)
	replace longest_occ_combined_`i'=6 	if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==12)
	replace longest_occ_combined_`i'=7 	if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==13)
	replace longest_occ_combined_`i'=8 	if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==11)
	replace longest_occ_combined_`i'=9 	if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==15)
	replace longest_occ_combined_`i'=10 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==18)
	replace longest_occ_combined_`i'=11 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==20)
	replace longest_occ_combined_`i'=12 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==19)
	replace longest_occ_combined_`i'=13 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==21)
	replace longest_occ_combined_`i'=15 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==22)
	replace longest_occ_combined_`i'=17 if longest_occ_combined_`i'==.b & (longest_occ_c_`i'==23)

	replace longest_occ_combined_`i'= 0 if longest_occ_combined_`i' > 3000

	gen SOC = longest_occ_combined_`i'

	merge m:1 SOC using $in\ONETpca, keepusing(Phys* Ksa*) // add ONET principal components for physical and cognitive tasks
	foreach var in Phys* Ksa* {
		ren `var' `var'_`i'
	}
	drop _merge SOC
}


drop 	inw* rpmbmi*   hastck*   habond*   hachck*   hacd*     hatotb* hatotf*  hitot*    rhenum* ///   
		rhigov*   rgovmr*   rgovmd*   rgovva*   rhiothp*  rprprm*   rmrprem*  rprpcnt*  rprprm1* ///  
		rprprm2*  rprprm3*  rinlbrf*  rtotmd*   iy*       ddist*  rcovr* 

compress


sort hhidpn record year

format hhidpn %12.0g


****************************************
****************************************
save complete_data_final_12mb_v1.dta, replace 
****************************************
****************************************
* final dataset is complete
* has at least one obs per person per year, with data on income, f831 application,
* and if rand interview happened after f831 application, then also closest-in-time interview data
* note that if more than one f831 application or appeal in given year, then there are 
* multiple rows per person per year (one for each f831 application/appeal)


erase receive_ssdi_ssa.dta
erase receive_ssi_ssa.dta
erase track_demogs.dta

log close